MadDF <- filter(FullDF,Site=="Madison")%>%
    mutate(FlagedOutliers = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
           #Manual flagging that method misses due to boundary effect with binning
           FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
                                   TRUE, FlagedOutliers),
           NoOutlierVar = ifelse(FlagedOutliers, NA, N1))

OutlierDF <- MadDF%>%
  filter(FlagedOutliers)

FullDFMassRatio <- FullDF%>%
  filter(!is.na(N1))%>%
  mutate(SC2_mass = (3.785*1e6)*FlowRate*N1)


MissingIntercepter <- FullDFMassRatio%>%
  filter(Site!="Madison",!is.na(FlowRate)|!is.na(N1))%>%
  group_by(Date)%>%
  summarize(N = n())%>%
  filter(N!=5)


FullDFMassRatio.Mad <- FullDFMassRatio%>%
  filter(!is.na(SC2_mass))%>%
  filter(Site=="Madison")
FullDFMassRatio.Inter <- FullDFMassRatio%>%
  filter(!is.na(SC2_mass))%>%
  filter(Site!="Madison")#TempErrorMetric

TempErrorMetric <- FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(InterSum = sum(SC2_mass),InterSumFlow = sum(FlowRate))%>%
  inner_join(FullDFMassRatio.Mad)%>%
  mutate(MassRatio = SC2_mass/InterSum,
         ErrorRatio = 2*(SC2_mass-InterSum)/(SC2_mass+InterSum),
         MassRatioFlow = FlowRate/InterSumFlow,
         ErrorRatioFlow = 2*(FlowRate-InterSumFlow)/(FlowRate+InterSumFlow))


FullDFMassRatio.Mad <- FullDFMassRatio%>%
  filter(!is.na(SC2_mass))%>%
  filter(Site=="Madison")%>%
  inner_join(FullDFMassRatio.Inter["Date"])%>%
  unique()

FullDFMassRatio.Inter <- FullDFMassRatio%>%
  filter(Site!="Madison")
KeyOulierPoints <- c(ymd("2021-06-08"),
                     ymd("2021-10-17"),
                     ymd("2021-05-02"),
                     ymd("2021-01-26"))

NonOuliersDF <- MadDF%>%
  mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 = NoOutlierVar)%>%
            filter(!(is.na(N1)))

OutLierPlotDF <- MadDF%>%
  mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 = NoOutlierVar)%>%
  filter(!(is.na(N1)&is.na(Outlier)))%>%
  ggplot(aes(x=Date))+#Data depends on time 
  geom_line(aes(y=N1,
                color="N1", 
                info = N1),data=NonOuliersDF)+#compares N1
  geom_point(aes(y=Outlier,
                 color="N1 Outlier",
                 info = Outlier))+
  theme_light() +
  scale_color_manual(values=c("#F8766D","#999999"))

ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"

Sum of interceptor flows agrees with composite flow except for dates with missing flow information for some interceptors. Purple boxes beneath data are to communicate the Dates with interceptor data partially missing.

MissingFlowData <- FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(n=n())%>%
  filter(n!=5)%>%
  mutate(MissingSite=TRUE)


FlowPlot <- FullDFMassRatio.Inter%>%
  full_join(MissingFlowData)%>%
  mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
  filter(!is.na(FlowRate))%>%
  ggplot()+
  geom_col(aes(x=Date,y=FlowRate,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) + 
  geom_col(aes(x=Date,y=-.5),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
  scale_fill_brewer(palette="Spectral") +
  scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
  theme_light() +
  geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=FlowRate),size=2)+
  ylab("Average daily flow (MGD)") +
  geom_hline(yintercept=median(FullDFMassRatio.Mad$FlowRate), linetype = "dashed", color="black") +
  ggtitle("Average daily flow (MMSD)")

ggplotly(FlowPlot)%>%
  add_annotations( text="Site, Full Data For Day", xref="paper", yref="paper",
                  x=1.02, xanchor="left",
                  y=0.8, yanchor="bottom",    # Same y as legend below
                  legendtitle=TRUE, showarrow=FALSE ) %>%
  layout( legend=list(y=0.8, yanchor="top" ) )
MassBalencePlot <- FullDFMassRatio.Inter%>%
  full_join(MissingFlowData)%>%
  mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
  #filter(!is.na(FlowRate))%>%
  #filter(!MissingSite)%>%
  ggplot()+
  geom_col(aes(x=Date,y=SC2_mass,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) + 
  geom_col(aes(x=Date,y=-3e12),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
  scale_fill_brewer(palette="Spectral") +
  scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
  theme_light() +
  geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=SC2_mass),size=1)+
  ylab("N1 gene copies per day") +
  
  ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
  

ggplotly(MassBalencePlot)
N_One_Outliers <- c(ymd("2021-04-15"), ymd("2021-04-26"), ymd("2021-07-26"), ymd("2021-12-13"))
N_TWO_Outliers <- c(ymd("2021-04-15"), ymd("2021-05-20"))



FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
            nIntercepter = n())%>%
  inner_join(FullDFMassRatio.Mad)%>%
  mutate(Ratio = (SC2_InterSum/SC2_mass)*(5/nIntercepter))%>%
  arrange(Ratio)%>%
  select(Ratio,nIntercepter, everything())#%>%
## # A tibble: 104 x 15
##    Ratio nIntercepter Date       SC2_InterSum Site    FirstConfirmed
##    <dbl>        <int> <date>            <dbl> <chr>            <int>
##  1 0.159            3 2021-04-15      8.02e12 Madison             56
##  2 0.331            5 2021-07-26      8.16e12 Madison             24
##  3 0.340            5 2021-12-13      6.91e13 Madison            144
##  4 0.350            5 2021-05-31      6.25e12 Madison              3
##  5 0.408            5 2021-03-22      9.09e12 Madison             NA
##  6 0.416            5 2021-08-19      1.27e13 Madison             94
##  7 0.471            5 2021-02-11      2.55e13 Madison             71
##  8 0.490            4 2021-06-21      4.56e12 Madison             NA
##  9 0.502            5 2021-09-20      3.28e13 Madison            179
## 10 0.569            5 2021-08-23      2.43e13 Madison             53
## # ... with 94 more rows, and 9 more variables: SevenDayMACases <dbl>, N1 <dbl>,
## #   BCoV <dbl>, PMMoV <dbl>, GeoMeanN12 <dbl>, FlowRate <dbl>,
## #   temperature <dbl>, equiv_sewage_amt <dbl>, SC2_mass <dbl>
  #summarise(mean(Ratio))
LowRatioN_One <- c("2021-04-15","2021-07-26","2021-12-13","2021-05-31","2021-03-22")

LowRatioN_TWO <- c("2021-04-15","2021-07-26","2021-12-13","2021-10-04","2021-03-22")


InterceptRatioPlot <- FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
            nIntercepter = n())%>%
  inner_join(FullDFMassRatio)%>%
  mutate(Ratio = (SC2_mass/SC2_InterSum)*(nIntercepter/5))%>%
  arrange(Ratio)%>%
  select(Ratio,nIntercepter, everything())%>%
  ggplot(aes(x=Date))+
  geom_line(aes(y= Ratio,color = Site))

ggplotly(InterceptRatioPlot)
IntercepterOverLay <- FullDFMassRatio.Inter%>%
  complete(Date,Site)%>%
  full_join(FullDFMassRatio)%>%
  ggplot(aes(x=Date))+
  geom_line(aes(y=N1,color = Site))+
  scale_y_log10()
ggplotly(IntercepterOverLay)
N1BalencePlot <- FullDFMassRatio.Inter%>%
  complete(Date,Site)%>%
  ggplot()+
  geom_col(aes(x=Date,y=N1,fill=Site),
           position = "dodge", stat="identity", width = 2) + 
  geom_col(aes(x=Date,y=-3e3),
           data=MissingFlowData,color="Purple",fill="Purple", width = 2)+
  scale_fill_brewer(palette="Spectral") +
  scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
  theme_light() +
  geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=N1),size=1)+
  scale_y_log10()+
  ylab("N1 gene copies per day") +
  
  ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
  

ggplotly(N1BalencePlot)
FullDFMassRatio.Inter%>%
  group_by(Site)%>%
  summarise(N1 = mean(N1,na.rm=TRUE),
            Flow = mean(FlowRate,na.rm=TRUE),
            SC2 = mean(SC2_mass,na.rm=TRUE),
            AntiSC2 = mean(N1/FlowRate,na.rm=TRUE))#%>%
## # A tibble: 5 x 5
##   Site          N1  Flow     SC2 AntiSC2
##   <chr>      <dbl> <dbl>   <dbl>   <dbl>
## 1 MMSD-P11 294443.  8.58 9.04e12  34459.
## 2 MMSD-P18 403384. 11.2  1.69e13  36331.
## 3 MMSD-P2  313275.  5.85 6.57e12  53950.
## 4 MMSD-P7  356002.  4.40 5.86e12  87100.
## 5 MMSD-P8  383066.  6.27 8.66e12  61481.
  #ungroup()%>%
  #summarise(VarN1 = var(N1)/mean(N1)^2,
  #          VarFlow = var(Flow)/mean(Flow)^2,
  #          VarSC2 = var(SC2)/mean(SC2)^2,
  #          VarASC2 = var(AntiSC2)/mean(AntiSC2)^2)


#Graph paportion of data intercepters are

arranged plots with lines to communicate where flagged outliers are. Red Lines show Where there was a flagged on a day that also has collected interceptor data.

StartRange <- mdy("1-20-2021")
EndRange <- mdy("1-10-2022")

AllOuliersDates <- OutlierDF[["Date"]]
OutlierDatesIntercepters <- unique(inner_join(FullDFMassRatio.Inter,OutlierDF["Date"],by=c("Date"))$Date)

a <- OutLierPlotDF+
  scale_x_date(limits = c(StartRange,EndRange))+
  #geom_vline(xintercept = AllOuliersDates)+
  geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
  #geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
  theme(title = element_blank(),
        axis.text.x = element_blank(), 
        axis.title.x = element_blank(),
        legend.position="none")
b <- MassBalencePlot+
  scale_x_date(limits = c(StartRange,EndRange))+
  #geom_vline(xintercept = (AllOuliersDates))+
  geom_vline(xintercept = (OutlierDatesIntercepters),color="Red")+
  #geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
  theme(title = element_blank(),
    axis.text.x = element_blank(), 
        axis.title.x = element_blank(),
    legend.position="none")
c <- FlowPlot+
  scale_x_date(limits = c(StartRange,EndRange))+
  #geom_vline(xintercept = AllOuliersDates)+
  geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
  theme(title = element_blank(),
        legend.position="none")



ggarrange(a,b,c,nrow=3,align ="v",legend.grob=get_legend(MassBalencePlot),legend="right")

#Cause of missing data
OutlierDatesIntercepters
## [1] "2021-04-15" "2021-04-26" "2021-07-26" "2021-12-13"
N_One_Outliers <- c(ymd("2021-04-15"), ymd("2021-04-26"), ymd("2021-07-26"), ymd("2021-12-13"))
N_TWO_Outliers <- c(ymd("2021-04-15"), ymd("2021-05-20"))
LowRatioN_One <- c("2021-04-15","2021-07-26","2021-12-13","2021-05-31","2021-03-22")
LowRatioN_TWO <- c("2021-04-15","2021-07-26","2021-12-13","2021-10-04","2021-03-22")

AllIntercepterComp_POI <- c(N_One_Outliers,N_TWO_Outliers,LowRatioN_One,LowRatioN_TWO)

FullDF%>%
  filter(Date %in% AllIntercepterComp_POI)%>%
  select(-FirstConfirmed,SevenDayMACases)
##          Date     Site SevenDayMACases      N1   BCoV     PMMoV GeoMeanN12
## 1  2021-03-22 MMSD-P11        3.600000   74718  7.233  13995222      74718
## 2  2021-03-22 MMSD-P18        3.600000   87689  9.071   5087805      87689
## 3  2021-03-22  MMSD-P2        6.000000   41663  6.174  12906803      41663
## 4  2021-03-22  MMSD-P7        7.400000   43453  0.741  13088946      43453
## 5  2021-03-22  MMSD-P8        8.600000   51290  5.706  19118442      51290
## 6  2021-04-15 MMSD-P11       11.285714  157424  3.626  45532010     157424
## 7  2021-04-15 MMSD-P18       10.142857      NA     NA        NA         NA
## 8  2021-04-15  MMSD-P2       10.285714   48521  3.533  26162914      48521
## 9  2021-04-15  MMSD-P7       11.428571      NA     NA        NA         NA
## 10 2021-04-15  MMSD-P8       11.857143   60518  5.001  20604026      60518
## 11 2021-04-26 MMSD-P11        4.857143  173731  8.414  32400873     173731
## 12 2021-04-26 MMSD-P18        5.333333 1761146 19.407  18185986    1761146
## 13 2021-04-26  MMSD-P2        5.833333   66599 17.244 169992062      66599
## 14 2021-04-26  MMSD-P7        7.666667  145908 12.930  13107287     145908
## 15 2021-04-26  MMSD-P8        8.166667  181856 11.324  20215977     181856
## 16 2021-05-20 MMSD-P11        2.714286    9997  2.871  66800100       9997
## 17 2021-05-20 MMSD-P18        2.857143 1945496  6.505  15415932    1945496
## 18 2021-05-20  MMSD-P2        3.571429  608010  9.994  44245555     608010
## 19 2021-05-20  MMSD-P7        3.571429   56388  2.936  32473853      56388
## 20 2021-05-20  MMSD-P8        3.000000   15247  5.676  18441997      15247
## 21 2021-05-31 MMSD-P11        1.428571    3972  3.353  31740972       3972
## 22 2021-05-31 MMSD-P18        1.333333  120744  4.302  31643578     120744
## 23 2021-07-26 MMSD-P11        4.428571   56082  1.730  34062331      56082
## 24 2021-07-26 MMSD-P18        3.714286   96259  4.820  11582896      96259
## 25 2021-07-26  MMSD-P2        5.428571   87920  7.879  42669683      87920
## 26 2021-07-26  MMSD-P7        6.285714   22234  8.842  21276643      22234
## 27 2021-07-26  MMSD-P8        7.571429   14820  9.955  24223424      14820
## 28 2021-10-04 MMSD-P11       21.857143  132758  3.220  30769148     132758
## 29 2021-10-04 MMSD-P18       25.142857  100502  0.800   9759807     100502
## 30 2021-10-04  MMSD-P2       26.285714  220932  3.560  13670096     220932
## 31 2021-10-04  MMSD-P7       29.285714  137332  3.210  14498503     137332
## 32 2021-10-04  MMSD-P8       27.857143  172914  1.680  20682700     172914
## 33 2021-12-13 MMSD-P11       19.000000  458847 16.130  86006923     458847
## 34 2021-12-13 MMSD-P18       21.000000  722770 12.720  13380021     722770
## 35 2021-12-13  MMSD-P2       34.000000  429555  2.325  33305288     429555
## 36 2021-12-13  MMSD-P7       49.285714  300888  2.231  31784189     300888
## 37 2021-12-13  MMSD-P8       56.285714  443364  8.883  18625898     443364
## 38 2021-03-22  Madison       35.000000  162650  1.035  16649039     162650
## 39 2021-04-15  Madison       50.285714  571269  3.684  23628196     571269
## 40 2021-04-26  Madison       39.333333  675748 16.079  28619982     675748
## 41 2021-05-20  Madison       19.666667  224021 14.337  49311472     224021
## 42 2021-05-31  Madison        6.000000  147362  3.645  37833422     147362
## 43 2021-07-26  Madison       43.333333  186460  3.589  30230924     186460
## 44 2021-10-04  Madison       90.000000  244225  2.350  12107928     244225
## 45 2021-12-13  Madison      229.333333 1498471 13.886  24525524    1498471
## 46 2021-05-31  MMSD-P2              NA   13114  4.599  29170047      13114
## 47 2021-05-31  MMSD-P7              NA    8117  1.276  30386513       8117
## 48 2021-05-31  MMSD-P8              NA   58037  5.907  19836360      58037
##    FlowRate temperature equiv_sewage_amt
## 1      9.02          NA            1.000
## 2     11.52          NA            1.000
## 3      5.81          NA            1.000
## 4      3.67          NA            1.000
## 5      6.18          NA            1.000
## 6      8.91          NA            1.000
## 7        NA          NA               NA
## 8      6.33          NA            1.000
## 9        NA          NA               NA
## 10     6.78          NA            1.000
## 11     8.62          NA            0.250
## 12    10.79          NA            0.250
## 13     5.84          NA            0.250
## 14     4.41          NA            0.250
## 15     6.38          NA            0.250
## 16     8.61          NA            0.625
## 17    11.32          NA            0.625
## 18     5.52          NA            0.625
## 19     3.65          NA            0.625
## 20     6.88          NA            0.625
## 21     7.88          NA            0.625
## 22     9.98          NA            0.625
## 23     8.44          NA            1.000
## 24    10.28          NA            1.000
## 25     5.74          NA            1.000
## 26     4.52          NA            1.000
## 27     5.95          NA            1.000
## 28     8.47          NA            0.625
## 29    10.08          NA            0.625
## 30     6.74          NA            0.625
## 31     4.35          NA            0.625
## 32     6.27          NA            0.625
## 33     8.16          NA            0.625
## 34    10.74          NA            0.625
## 35     6.14          NA            0.625
## 36     4.65          NA            0.625
## 37     6.09          NA            0.625
## 38    36.20          NA            1.000
## 39    39.02          NA            1.000
## 40    36.04          NA            0.250
## 41    35.98          NA            0.625
## 42    32.02          NA            0.625
## 43    34.93          NA            1.000
## 44    35.91          NA            0.625
## 45    35.78          NA            0.625
## 46     4.74          NA            0.625
## 47     3.91          NA            0.625
## 48     5.51          NA            0.625